AddN
对多个输入张量进行 逐元素相加,并将结果输出; 支持标量广播。
数学表达式为:
\[\text{output} = \sum_{i=0}^{tensor\_num-1}\text{inputs[i]}\]
- 输入:
inputs - 输入数据地址数组。
tensor_num - 输入张量数量。
element_nums - 输入张量元素数量数组。
core_mask - 核掩码(仅适用于共享存储版本)。
- 输出:
output - 计算结果地址。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持
fp32、fp64、int8、int16、int32、cplx64、cplx128类型MT7004 支持
fp16、fp32、int16、int32、cplx64类型
共享存储版本:
-
void hp_addn_s(half **inputs, half *output, int tensor_num, int *element_nums, int core_mask)
-
void fp_addn_s(float **inputs, float *output, int tensor_num, int *element_nums, int core_mask)
-
void dp_addn_s(double **inputs, double *output, int tensor_num, int *element_nums, int core_mask)
-
void i8_addn_s(int8_t **inputs, int8_t *output, int tensor_num, int *element_nums, int core_mask)
-
void i16_addn_s(int16_t **inputs, int16_t *output, int tensor_num, int *element_nums, int core_mask)
-
void i32_addn_s(int32_t **inputs, int32_t *output, int tensor_num, int *element_nums, int core_mask)
-
void c64_addn_s(float **inputs, float *output, int tensor_num, int *element_nums, int core_mask)
-
void c128_addn_s(double **inputs, double *output, int tensor_num, int *element_nums, int core_mask)
C调用示例:
1// FT78NE 示例
2#include <stdio.h>
3#include <addn.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0xA0000000;
7 float *input1 = (float *)0xA0010000;
8 float **inputs = (float **)0xB0000000;
9 inputs[0] = input0;
10 inputs[1] = input1;
11 float *output = (float *)0xC0000000;
12 int tensor_num = 2;
13 int *element_nums = (int *)0xB0001000;
14 element_nums[0] = 1024;
15 element_nums[1] = 1024;
16 int core_mask = 0xff;
17 fp_addn_s(inputs, output, tensor_num, element_nums, core_mask);
18 return 0;
19}
私有存储版本:
-
void hp_addn_p(half **inputs, half *output, int tensor_num, int *element_nums)
-
void fp_addn_p(float **inputs, float *output, int tensor_num, int *element_nums)
-
void dp_addn_p(double **inputs, double *output, int tensor_num, int *element_nums)
-
void i8_addn_p(int8_t **inputs, int8_t *output, int tensor_num, int *element_nums)
-
void i16_addn_p(int16_t **inputs, int16_t *output, int tensor_num, int *element_nums)
-
void i32_addn_p(int32_t **inputs, int32_t *output, int tensor_num, int *element_nums)
-
void c64_addn_p(float **inputs, float *output, int tensor_num, int *element_nums)
-
void c128_addn_p(double **inputs, double *output, int tensor_num, int *element_nums)
C调用示例:
1// FT78NE 示例
2#include <stdio.h>
3#include <addn.h>
4
5int main(int argc, char* argv[]) {
6 float *input0 = (float *)0x10810000; // L2 空间
7 float *input1 = (float *)0x10814000;
8 float **inputs = (float **)0x10818000;
9 inputs[0] = input0;
10 inputs[1] = input1;
11 float *output = (float *)0x10820000;
12 int tensor_num = 2;
13 int *element_nums = (int *)0x1082C000;
14 element_nums[0] = 1024;
15 element_nums[1] = 1024;
16 fp_addn_p(inputs, output, tensor_num, element_nums);
17 return 0;
18}